# Direct outputs eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
    scipen = 999,
    digits = 16,
    max.print = .Machine$integer.max,
    show.error.locations = TRUE,
    warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# BLS API calls require key (private; not eligible for public release)
bls_api_key <- "YOUR KEY GOES HERE"

# PACKAGES ---------------------------------------------------------------------

library(data.table)
library(rjson)
library(httr)
library(XML)
library(ggplot2)
library(ggh4x)

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

# Just in case something masks 'source()', use extra deliberate base::source().
base::source("~/code/0-utility-functions/mathematical-utils.R", local = TRUE)

# PULL IN BLS CPI DATA FOR CPI ADJUSTMENT --------------------------------------
# Series for historical CPI-U (CUUR0000SA0)
# ==> can manually inspect here: <https://data.bls.gov/timeseries/CUUR0000SA0>

if (bls_api_key != "YOUR KEY GOES HERE") {

    # Place two calls to BLS API.
    # ==> BLS API requires continuous years (so can't do 1986 and 2005 only)
    # ==> a bit troublesome to work with the output of the 1986:2005 call

    bls_api_call1986_temp <-
        paste0(c(
            "{",
            sprintf("\"seriesid\":[\"%s\"],", "CUUR0000SA0"),
            sprintf("\"startyear\":%s,", 1986),
            sprintf("\"endyear\":%s,", 1986),
            sprintf("\"annualaverage\":\"%s\",", "true"),
            sprintf("\"registrationkey\":\"%s\"", bls_api_key),
            "}"
        ), collapse = "")

    bls_api_call2016_temp <-
        paste0(c(
            "{",
            sprintf("\"seriesid\":[\"%s\"],", "CUUR0000SA0"),
            sprintf("\"startyear\":%s,", 2016),
            sprintf("\"endyear\":%s,", 2016),
            sprintf("\"annualaverage\":\"%s\",", "true"),
            sprintf("\"registrationkey\":\"%s\"", bls_api_key),
            "}"
        ), collapse = "")

    get1986_temp <-
        POST(
            url = "https://api.bls.gov/publicAPI/v2/timeseries/data/",
            body = bls_api_call1986_temp,
            content_type_json()
        )

    cpi1986_base1980 <-
        fromJSON(rawToChar(get1986_temp$content))$Results$series[[1]]$data[[1]]$value # nolint

    get2016_temp <-
        POST(
            url = "https://api.bls.gov/publicAPI/v2/timeseries/data/",
            body = bls_api_call2016_temp,
            content_type_json()
        )

    cpi2016_base1980 <-
        fromJSON(rawToChar(get2016_temp$content))$Results$series[[1]]$data[[1]]$value # nolint

    cpi_conv_1986_to_2016 <-
        (as.numeric(cpi2016_base1980) / as.numeric(cpi1986_base1980))
    # (240.007 / 109.6) last time code was run
    # ==> just in case the CPI-U series is retroactively revised later

    # Housekeeping
    bls_api_call1986_temp <- NULL
    get1986_temp <- NULL
    cpi1986_base1980 <- NULL
    bls_api_call2016_temp <- NULL
    get2016_temp <- NULL
    cpi2016_base1980 <- NULL
    rm(
        bls_api_call1986_temp,
        get1986_temp,
        cpi1986_base1980,
        bls_api_call2016_temp,
        get2016_temp,
        cpi2016_base1980
    )
} else {
    # Just using the static values from last time code was run
    cpi_conv_1986_to_2016 <- (240.007 / 109.6)
}

# PULL IN WORLD BANK REAL INTEREST RATE ----------------------------------------
# Series for historical annual real interest rate (in %) (FR.INR.RINR)
# ==> can manually inspect here:
# ==> <https://data.worldbank.org/indicator/FR.INR.RINR?locations=US>

wb_api_url_base_temp <- "http://api.worldbank.org/v2/country/us/indicator"
wb_api_url_details_temp <- "FR.INR.RINR?date=1986:2005"
wb_api_url <- sprintf("%s/%s", wb_api_url_base_temp, wb_api_url_details_temp)

get_realrate_temp <-
    xmlToDataFrame(xmlParse(GET(url = wb_api_url, headers = accept_json())))
get_realrate_temp <- setDT(get_realrate_temp)
r_irs2001 <- mean(as.numeric(get_realrate_temp$value)) / 100
# 0.05082075559038537 in the relevant call
# ==> just in case the World Bank series is retroactively revised later

# Housekeeping (order seems to matter here for interactive code runs)
wb_api_url_base_temp <- NULL
wb_api_url_details_temp <- NULL
wb_api_url <- NULL
get_realrate_temp <- NULL
rm(
    wb_api_url_base_temp,
    wb_api_url_details_temp,
    wb_api_url,
    get_realrate_temp
)

# READ IN OUR DATA -------------------------------------------------------------

# Our prizes; requires individual level data, not eligible for public release.

# To construct our baseline estimation sample, we impose three restrictions:
# 1)  We require each winner to be of working age (21 and 64) in their win year.
# 2)  We require each winner to be in the sample for at least two years prior to
#     generating their first Form W-2G.
# ==> This ensures that we observe pre-win economic outcomes for each individual
# 3) We restrict our baseline estimation sample to wins of 30K+
# ==> Choice translates to economically-meaningful $1K per year for avg winner.

our_sample_all_ages <-
    readRDS("~/summary-output/our-distributions.rds")
our_sample_all_ages <- setDT(our_sample_all_ages)

# Just need to impose restriction 1) from above^
# ==> the variable 'age_case' let's us do this
our_sample <- our_sample_all_ages[age_case == 1]
our_sample[, id_var := .I]

# Rename variables to be more informative
setnames(our_sample, "L_multiperiod_nonequiv_pretax", "pretax_winner_winnings")
setnames(our_sample, "L_multiperiod", "posttax_peradult_winnings")
setnames(our_sample, "L_ann_multiperiod", "posttax_peradult_annuity")

# Housekeeping
our_sample_all_ages <- NULL
rm(our_sample_all_ages)

# READ IN IMBENS, RUBIN, AND SACERDOTE (2001) UNDERLYING DATA ------------------

# The dataset is taken from Li and Mueller (2021, Quantitative Economics)
# @source <https://qeconomics.org/ojs/index.php/qe/article/view/1577>
# @format A data.table with 496 rows and 77 variables
# \describe{
#   \item{yearlpr}{Yearly prize, in $1000 (time invariant in data); 0 denotes "nonwinners"} #nolint
#   \item{yearw}{Year won (time invariant in data)}
#   \item{tixbot}{Tickets bought (time invariant in data)}
#   \item{agew}{Age of the winner in the win year (time invariant in data)}
#   \item{agewgt55}{Binary indicator, agew is greater than 55 (time invariant in data)} #nolint
#   \item{agewgt65}{Binary indicator, agew is greater than 65 (time invariant in data)} #nolint
#   \item{male}{Binary indicator, sex of the winner is male (time invariant in data)} #nolint
#   \item{educ}{Years of schooling (time invariant in data)}
#   \item{college}{College attendance (time invariant in data)}
#   \item{collegedegree}{College degree (time invariant in data)}
#   \item{workthen}{Working then (time invariant in data)}
#   \item{xearn1}{Earnings in year -6, in $1000}
#   \item{xearn2}{Earnings in year -5, in $1000}
#   \item{xearn3}{Earnings in year -4, in $1000}
#   \item{xearn4}{Earnings in year -3, in $1000}
#   \item{xearn5}{Earnings in year -2, in $1000}
#   \item{xearn6}{Earnings in year -1, in $1000}
#   \item{yearn1}{Earnings in year 0, in $1000}
#   \item{yearn2}{Earnings in year +1, in $1000}
#   \item{yearn3}{Earnings in year +2, in $1000}
#   \item{yearn4}{Earnings in year +3, in $1000}
#   \item{yearn5}{Earnings in year +4, in $1000}
#   \item{yearn6}{Earnings in year +5, in $1000}
#   \item{yearn7}{Earnings in year +6, in $1000}
#   \item{xearnp1}{Positive earnings in year -6}
#   \item{xearnp2}{Positive earnings in year -5}
#   \item{xearnp3}{Positive earnings in year -4}
#   \item{xearnp4}{Positive earnings in year -3}
#   \item{xearnp5}{Positive earnings in year -2}
#   \item{xearnp6}{Positive earnings in year -1}
#   \item{yearnp1}{Positive earnings in year 0}
#   \item{yearnp2}{Positive earnings in year +1}
#   \item{yearnp3}{Positive earnings in year +2}
#   \item{yearnp4}{Positive earnings in year +3}
#   \item{yearnp5}{Positive earnings in year +4}
#   \item{yearnp6}{Positive earnings in year +5}
#   \item{yearnp7}{Positive earnings in year +6}
#   \item{winner}{Indicator for lottery winner}
#   \item{bigwinner}{Indicator for an annual prize of at least 100K}
# }

# dictionaries
irs2001_dict <-
    c(
        "yearlpr" = "Yearly prize",
        "yearnavg7" = "Average",
        "yearn1" = "Year 0",
        "yearn2" = "Year 1",
        "yearn3" = "Year 2",
        "yearn4" = "Year 3",
        "yearn5" = "Year 4",
        "yearn6" = "Year 5",
        "yearn7" = "Year 6",
        "yearnavg7_dif" = "Average",
        "yearn1-xearn6" = "Year 0",
        "yearn2-xearn6" = "Year 1",
        "yearn3-xearn6" = "Year 2",
        "yearn4-xearn6" = "Year 3",
        "yearn5-xearn6" = "Year 4",
        "yearn6-xearn6" = "Year 5",
        "yearn7-xearn6" = "Year 6"
    )

# read data
irs2001_sample <-
    readRDS("~/external-data/imbens-rubin-sacerdote-2001-data.rds")

irs2001_sample[, id_var := .I]

# type management -- some numeric type should be converted to integer
integer_vars <-
    c(
        "male",
        "workthen",
        "tixbot",
        "yearw",
        "agew",
        "winner",
        "educ",
        "xearnp1",
        "xearnp2",
        "xearnp3",
        "xearnp4",
        "xearnp5",
        "xearnp6",
        "yearnp1",
        "yearnp2",
        "yearnp3",
        "yearnp4",
        "yearnp5",
        "yearnp6",
        "yearnp7",
        "bigwinner",
        "college",
        "collegedegree",
        "agewgt45",
        "agewgt55",
        "agewgt65",
        "interaction_group",
        "interaction_group_dummies1",
        "interaction_group_dummies2",
        "interaction_group_dummies3",
        "interaction_group_dummies4",
        "interaction_group_dummies5",
        "interaction_group_dummies6",
        "interaction_group_dummies7",
        "interaction_group_dummies8",
        "interaction_group_dummies9",
        "interaction_group_dummies10",
        "interaction_group_dummies11",
        "interaction_group_dummies12",
        "interaction_group_dummies13",
        "interaction_group_dummies14",
        "interaction_group_dummies15",
        "interaction_group_dummies16",
        "obs",
        "ones"
    )

# Type conversion
for (int_col in integer_vars) {
    set(irs2001_sample,
        j = int_col,
        value = as.integer(irs2001_sample[[int_col]])
    )
}

irs2001_sample[, pretax_winner_annual_pmt := yearlpr * 1000]

# Categorize two key IRS2001 estimation samples (taken directly from IRS2001)
# ==>   Full Winners Sample -- all winners in the IRS2001 with annual payouts
# ==>   Sample Excluding Large Winners -- subset of Full Winners Sample with
#       annual payout of < 100K
exclude_large_cond <-
    "as.integer(pretax_winner_annual_pmt > 0 & pretax_winner_annual_pmt < 100000)" # nolint
irs2001_sample <- irs2001_sample[pretax_winner_annual_pmt > 0]
irs2001_sample[, exclude_large_sample := eval(parse(text = exclude_large_cond))]

# CONSTRUCT VARIABLES FOR DISTRIBUTION COMPARISON ------------------------------

# For IRS2001, need to construct: pre-tax winner / HH winnings in 2016 dollars

# IRS2001 already have units of pre-tax winner / HH, but annual payments
# Bottom of page 786 of IRS2001 states the standard NPV of:
# \sum_{t=1}^20  ((pretax_winner_annual_pmt) / (1 + r_0)^t)
# but this assumes payments start the following year, although they really start
# right away so we use:
# \sum_{t=0}^19  ((pretax_winner_annual_pmt) / (1 + r_0)^t)
# which is equal to (partial sum formula):
# (pretax_winner_annual_pmt) *
# ( 1 - ( (1 / (1 + r_0))^(20) ) ) / ( 1 - (1 / (1 + r_0)) )

npv_per_dollar <- npv_factor(r_rate = r_irs2001, t_time = 20)

irs2001_sample[
    ,
    pretax_winner_winnings :=
        (pretax_winner_annual_pmt * npv_per_dollar * cpi_conv_1986_to_2016)
]

# Report a few numbers used in the paper
median_irs2001_excl_lrg <-
    median(irs2001_sample[exclude_large_sample == 1]$pretax_winner_winnings)
share_200k_irs2001_excl_lrg <-
    dim(
        irs2001_sample[
            exclude_large_sample == 1 &
                pretax_winner_winnings > 200000
        ]
    )[1] /
        dim(irs2001_sample[exclude_large_sample == 1])[1]

print(
    sprintf(
        "Even in the estimation sample that excludes the biggest winners, the median prize is $%s and the vast majority (%s percent of the sample) has prize amounts over $200,000.", # nolint
        round(median_irs2001_excl_lrg, digits = -3),
        round(share_200k_irs2001_excl_lrg * 100, digits = 0) # nolint
    )
)

footnote_irs2001_100k <- (100000 * npv_per_dollar * cpi_conv_1986_to_2016)
share_ours_beyond_100k <-
    dim(our_sample[pretax_winner_winnings > footnote_irs2001_100k])[1] /
        dim(our_sample)[1]

print(
    sprintf(
        "Imbens, Rubin, and Sacerdote (2001) define the biggest winners as those with wins over $100,000 (in 1986 dollars) annually over 20 years. When we convert that to pre-tax winnings and to 2016 dollars, these correspond to winnings of approximately $%s million dollars. About %s percent of winnings in our sample fall into this category.", # nolint
        round(footnote_irs2001_100k / 1000000, digits = 1),
        round(share_ours_beyond_100k * 100, digits = 0) # nolint
    )
)

median_our <-
    median(our_sample$pretax_winner_winnings)
share_200k_our <-
    dim(our_sample[pretax_winner_winnings > 200000])[1] /
        dim(our_sample)[1]

print(
    sprintf(
        "By comparison, the median pre-tax winning is merely $%s in our data, and only %s percent of pre-tax winnings exceed $200,000.", # nolint
        round(median_our, digits = -3),
        round(share_200k_our * 100, digits = 0) # nolint
    )
)

# Housekeeping
irs2001_dict <- NULL
integer_vars <- NULL
int_col <- NULL
r_irs2001 <- NULL
exclude_large_cond <- NULL
rm(
    irs2001_dict,
    integer_vars,
    int_col,
    r_irs2001,
    exclude_large_cond
)

# FIGURE B.2: COMPARISON OF PRE-TAX WINNINGS DISTRIBUTIONS ---------------------

# Convert to units of 000s for readability in a figure.
our_distrib_000s <-
    data.table(
        support_point = copy(our_sample$pretax_winner_winnings) / 1000
    )

irs2001_full_distrib_000s <-
    data.table(
        support_point = copy(irs2001_sample$pretax_winner_winnings) / 1000
    )

irs2001_excl_lrg_distrib_000s <-
    data.table(
        support_point = copy(irs2001_sample[exclude_large_sample == 1]$pretax_winner_winnings) / 1000 # nolint
    )

# Add palette.
color_palette <- c("#000000", "#440154FF", "#E69F00")

# Let's just calculate the sample share in each $10K bin
# Our sample starts at 30K, theirs at ~32K
# Their Sample Excluding Large Winners ends at ~$3M (3,000K)

our_distrib_000s[, bin_val := as.integer(NA)]
irs2001_full_distrib_000s[, bin_val := as.integer(NA)]
irs2001_excl_lrg_distrib_000s[, bin_val := as.integer(NA)]

endpoints <- seq.int(from = 0, to = 4000, by = 10)
endpoints_skeleton <-
    data.table(bin_val = seq.int(from = 0, to = 4000, by = 10))

for (ee in sort(endpoints, decreasing = TRUE)) {
    our_distrib_000s[support_point < ee, bin_val := as.integer(ee)]
    irs2001_full_distrib_000s[support_point < ee, bin_val := as.integer(ee)]
    irs2001_excl_lrg_distrib_000s[support_point < ee, bin_val := as.integer(ee)]
}
ee <- NULL
rm(ee)

# One bin for the largest prizes (just to make sure we integrate to one)
our_distrib_000s[is.na(bin_val), bin_val := 9999]
irs2001_full_distrib_000s[is.na(bin_val), bin_val := 9999]
irs2001_excl_lrg_distrib_000s[is.na(bin_val), bin_val := 9999]

our_distrib_000s_bin <-
    our_distrib_000s[
        ,
        .(bin_share = .N / dim(our_distrib_000s)[1]),
        by = .(bin_val)
    ]

our_distrib_000s_bin <-
    merge(
        endpoints_skeleton,
        our_distrib_000s_bin,
        by = "bin_val",
        all.x = TRUE,
        sort = FALSE
    )
our_distrib_000s_bin[is.na(bin_share), bin_share := 0]
our_distrib_000s_bin[, broad_grouping := "Our data"]
our_distrib_000s_bin[, distribution_label := "Our data"]

irs2001_full_distrib_000s_bin <-
    irs2001_full_distrib_000s[
        ,
        .(bin_share = .N / dim(irs2001_full_distrib_000s)[1]),
        by = .(bin_val)
    ]

irs2001_full_distrib_000s_bin <-
    merge(
        endpoints_skeleton,
        irs2001_full_distrib_000s_bin,
        by = "bin_val",
        all.x = TRUE,
        sort = FALSE
    )
irs2001_full_distrib_000s_bin[is.na(bin_share), bin_share := 0]
irs2001_full_distrib_000s_bin[
    ,
    broad_grouping := "Imbens, Rubin, and Sacerdote (2001)"
]
irs2001_full_distrib_000s_bin[
    ,
    distribution_label := "Full Winners Sample"
]

irs2001_excl_lrg_distrib_000s_bin <-
    irs2001_excl_lrg_distrib_000s[
        ,
        .(bin_share = .N / dim(irs2001_excl_lrg_distrib_000s)[1]),
        by = .(bin_val)
    ]

irs2001_excl_lrg_distrib_000s_bin <-
    merge(
        endpoints_skeleton,
        irs2001_excl_lrg_distrib_000s_bin,
        by = "bin_val",
        all.x = TRUE,
        sort = FALSE
    )
irs2001_excl_lrg_distrib_000s_bin[is.na(bin_share), bin_share := 0]
irs2001_excl_lrg_distrib_000s_bin[
    ,
    broad_grouping := "Imbens, Rubin, and Sacerdote (2001)"
]
irs2001_excl_lrg_distrib_000s_bin[
    ,
    distribution_label := "Sample Excluding Large Winners"
]

bin_data <-
    rbindlist(
        list(
            our_distrib_000s_bin,
            irs2001_excl_lrg_distrib_000s_bin,
            irs2001_full_distrib_000s_bin
        ),
        use.names = TRUE
    )

bin_data[
    ,
    broad_grouping :=
        factor(
            broad_grouping,
            levels = c("Our data", "Imbens, Rubin, and Sacerdote (2001)")
        )
]

bin_data[, distribution_label :=
    factor(
        distribution_label,
        levels = c(
            "Our data",
            "Sample Excluding Large Winners",
            "Full Winners Sample"
        )
    )]

# Housekeeping
our_distrib_000s <- NULL
our_distrib_000s_bin <- NULL
irs2001_full_distrib_000s <- NULL
irs2001_full_distrib_000s_bin <- NULL
irs2001_excl_lrg_distrib_000s <- NULL
irs2001_excl_lrg_distrib_000s_bin <- NULL
endpoints <- NULL
endpoints_skeleton <- NULL
rm(
    our_distrib_000s,
    our_distrib_000s_bin,
    irs2001_full_distrib_000s,
    irs2001_full_distrib_000s_bin,
    irs2001_excl_lrg_distrib_000s,
    irs2001_excl_lrg_distrib_000s_bin,
    endpoints,
    endpoints_skeleton
)

dist_fig <-
    ggplot(
        data = bin_data,
        aes(
            x = bin_val,
            y = bin_share,
            fill = distribution_label,
            color = distribution_label
        )
    ) +
    geom_col(
        position = "identity",
        width = 20
    ) +
    ylab("Share of Sample") +
    xlab("Size of lottery win (in units of thousands)") +
    theme_bw(base_size = 10) +
    theme(
        panel.grid.minor = element_blank(),
        # remove actual gridlines
        axis.text = element_text(size = 8),
        legend.position = "none",
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(
            t = 10,
            r = 10,
            b = 2.5,
            l = 2.5
        )),
        axis.title.x = element_text(margin = margin(
            t = 10,
            r = 10,
            b = 2.5,
            l = 2.5
        ))
    ) +
    scale_fill_manual(values = alpha(color_palette, alpha = c(1, 1, 0.6))) +
    scale_color_manual(values = alpha(color_palette, alpha = c(1, 1, 0.6))) +
    scale_x_continuous(
        breaks = seq.int(
            from = 0,
            to = 4000,
            by = 200
        ),
        labels = seq.int(
            from = 0,
            to = 4000,
            by = 200
        ),
        expand = expansion(mult = c(0.005, 0.0125)),
        guide = guide_axis(n.dodge = 2)
    ) +
    facet_wrap(~broad_grouping, nrow = 2, scales = "free_y") +
    coord_cartesian(
        xlim = c(
            0,
            4000
        ),
        clip = "on"
    ) +
    facetted_pos_scales(y = list(
        scale_y_continuous(
            breaks = (seq.int(
                from = 0, to = 20, by = 5
            ) / 100),
            labels = (seq.int(
                from = 0, to = 20, by = 5
            ) / 100),
            limits = c(0, 0.20),
            expand = expansion(mult = c(0.01, 0.0075))
        ),
        scale_y_continuous(
            breaks = (seq.int(
                from = 0, to = 100, by = 25
            ) / 1000),
            labels = (seq.int(
                from = 0, to = 100, by = 25
            ) / 1000),
            limits = c(0, 0.10),
            expand = expansion(mult = c(0.01, 0.0075))
        )
    ))

# Make a helper table for the labels for each distribution of weights.
line_label_data <-
    data.table(
        broad_grouping = c(
            "Our data",
            "Imbens, Rubin, and Sacerdote (2001)",
            "Imbens, Rubin, and Sacerdote (2001)"
        ),
        distribution_label = c(
            "Our data",
            "Sample Excluding Large Winners",
            "Full Winners Sample"
        ),
        bin_val = c(
            401,
            901,
            3401
        ),
        bin_share = c(
            0.05,
            0.0625,
            0.025
        )
    )

line_label_data[
    ,
    broad_grouping :=
        factor(
            broad_grouping,
            levels = c("Our data", "Imbens, Rubin, and Sacerdote (2001)")
        )
]

line_label_data[, distribution_label := factor(
    distribution_label,
    levels = c(
        "Our data",
        "Sample Excluding Large Winners",
        "Full Winners Sample"
    )
)]

dist_fig <-
    dist_fig +
    geom_label(
        data = line_label_data[broad_grouping != "Our data"],
        aes(x = bin_val, y = bin_share, label = distribution_label),
        size = 2.8,
        fill = "white"
    )

ggsave(
    plot = dist_fig,
    filename = "~/paper/figures/figure-B.2.png",
    width = 6,
    height = 5,
    dpi = 600
)

ggsave(
    plot = dist_fig,
    filename = "~/paper/figures/figure-B.2.tif",
    width = 6,
    height = 5,
    device = "tiff",
    dpi = 600
)

# Housekeeping
bls_api_key <- NULL
cpi_conv_1986_to_2016 <- NULL
our_sample <- NULL
irs2001_sample <- NULL
npv_per_dollar <- NULL
median_irs2001_excl_lrg <- NULL
share_200k_irs2001_excl_lrg <- NULL
footnote_irs2001_100k <- NULL
share_ours_beyond_100k <- NULL
median_our <- NULL
share_200k_our <- NULL
color_palette <- NULL
bin_data <- NULL
line_label_data <- NULL
dist_fig <- NULL
rm(
    bls_api_key,
    cpi_conv_1986_to_2016,
    our_sample,
    irs2001_sample,
    npv_per_dollar,
    median_irs2001_excl_lrg,
    share_200k_irs2001_excl_lrg,
    footnote_irs2001_100k,
    share_ours_beyond_100k,
    median_our,
    share_200k_our,
    color_palette,
    bin_data,
    line_label_data,
    dist_fig
)